package cn.itcast;

import us.codecraft.webmagic.Page;

import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class MyProcessor implements PageProcessor {
    @Override
    public void process(Page page) {
     page.addTargetRequests(page.getHtml().links().regex("https://blog.csdn.net/[a-z 0-9 -]+/article/details/[0-9]{8}").all());
//
      System.out.println(page.getHtml().xpath("//*[@class=\"nav_com\"]/ul/li[5]/a"));
      System.out.println("新的爬取结果:");
  //      System.out.println(page.getHtml().toString());
    }

    @Override
    public Site getSite() {
        return Site.me().setSleepTime(100).setRetryTimes(3);
    }

    public static void main(String[] args) {
        Spider.create(new MyProcessor()).addUrl("https://blog.csdn.net").run();
    }
}
